This file documents our reanalysis of the dataset used to examine biodiversity and conflict relationships in the Southern Philippines presented in this paper https://doi.org/10.1038/s44185-024-00044-8

Load required packages

#make sure to install all packages including required dependencies prior to use
library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(rgdal)
library(raster)
library(lme4)
library(MASS)
library(piecewiseSEM)

Prepare datasets

#biodiversity data was obtained from here https://doi.org/10.15468/rtedgk
#and conflict data was from here https://data.humdata.org/dataset/ucdp-data-for-philippines?
#note that only a subset of this dataset (i.e., those from Mindanao and adjacent islands) was used

#biodiversity data
bio <- read.csv("mobios_tab.csv", header = TRUE, sep = ",")
#subset data
bio.sub <- subset(bio, select = c(12, 23, 20, 15, 16))
bio.sub <- bio.sub %>% filter(county!="") #remove rows with no county information
bio.sub <- bio.sub %>% filter(class!="Malacostraca") #exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Bivalvia") #exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Gastropoda")#exclude this taxon
#check province name
#there are rows where the name of the province is uncertain as indicated by "/"
#"Zamboanga del Norte" was changed to "Zamboanga del Norte" prior to import of data
unique(bio.sub$county)
##  [1] "Lanao del Norte"                                      
##  [2] "South Cotabato"                                       
##  [3] "North Cotabato"                                       
##  [4] "Camiguin Island"                                      
##  [5] "Maguindanao"                                          
##  [6] "Agusan del Sur"                                       
##  [7] "Bukidnon"                                             
##  [8] "Davao Oriental"                                       
##  [9] "Davao"                                                
## [10] "Misamis Occidental"                                   
## [11] "Surigao del Sur"                                      
## [12] "Davao del sur"                                        
## [13] "Lanao del Sur"                                        
## [14] "Sarangani"                                            
## [15] "Misamis Oriental"                                     
## [16] "Surigao del Norte"                                    
## [17] "Davao de Oro"                                         
## [18] "Davao del Norte"                                      
## [19] "Zamboanga del Sur"                                    
## [20] "Surigao del Norte/Agusan del Sur"                     
## [21] "Dinagat Island"                                       
## [22] "Basilan"                                              
## [23] "Tawi-Tawi"                                            
## [24] "Sulu"                                                 
## [25] "Sultan Kudarat"                                       
## [26] "Agusan del Norte"                                     
## [27] "Cagayan de Oro"                                       
## [28] "Bukidnon/Camiguin"                                    
## [29] "Bukidnon/Misamis Oriental/Iligan"                     
## [30] "Zamboanga del Norte"                                  
## [31] "Zamboanga Sibugay"                                    
## [32] "Misamis Occidental/Misamis Oriental/Camiguin/Bukidnon"
## [33] "Zamboanga City"                                       
## [34] "General Santos"                                       
## [35] "North Cotabato/Davao City"
#remove uncertain province names
bio.sub <- bio.sub %>% filter(!grepl('/', county))

head(bio.sub)
#conflict data
con <- read.csv("conflict_tab_89-21.csv", header = TRUE, sep = ",")
con.sub <- subset(con, select = c(1,3,15,18,30,32,33))

head(con.sub)
write.csv(con.sub, "con.sub.csv")

Count recorded species per taxon in each point and within province

#the methods of how species counts were done in each site was not provided in the paper
#according to the paper analysis was done at the provincial level
#there could be two different ways how the species counts were done (see figure below)
#this part is extremely important which the authors failed to discuss
#the left panel of the figure shows that species counts were aggregated at the provincial level 
#each point (within the province) receives the same value of species count (or species richness)
#the right panel shows that each point has a unique number of species count
#note that these values are crucial for other downstream analyses 
#particularly when relationships of species counts 
#and distance to the nearest conflict sites are considered
#we generated the values for these two scenarios below 
#and test whether any of them would match the results presented in the paper

knitr::include_graphics("bioconflict.png")

#count unique records (i.e., species richness) per point
bio.data.sum <- bio.sub %>%
              group_by(county, class, decimalLatitude, decimalLongitude) %>%
              mutate(NumbSp = n_distinct(scientificName))

head(bio.data.sum)
#export data
write.csv(bio.data.sum, "bio.data.sum.csv")

#count species records per province (this lumps all the data, 
#thus each point will have a single species richness value)
prov.bio.data.sum <- bio.sub %>%
                          group_by(county, class) %>%
                          mutate(NumbSp = n_distinct(scientificName))
head(prov.bio.data.sum)
#export data
write.csv(prov.bio.data.sum, "prov.data.sum.csv")

Calculate the distance of species record to the nearest conflict site/s using QGIS

#in QGIS, import the "bio.data.sum" and "con.sub" from R as delimited text files. 
#export these files as GeoPackage or Shapefile to convert the data to meters prior to calculation
#use the CRS UTM Zone 51N
knitr::include_graphics("qgis_geo.png")

knitr::include_graphics("qgis_utm.png")

#import back the newly saved data to QGIS
#get the distance of nearest conflict areas using the "Joint attributes by nearest" plugin
#QGIS > Processing Toolbox > Join attributes by nearest
knitr::include_graphics("qgis_join.png")

#distance can be calculated as well without joining the attributes using the "Distance to nearest hub"
#after calculating the distance, a new layer will be created. 
#the new layer contains the calculated distance in the attribute table
#this also includes the x and y coordinates of the nearest site/point
#then export this new layer as a csv file so we can use the data in R for other analysis
#repeat the entire process for provincial data (i.e., prov.bio.data.sum)
#figure below shows the nearest conflict point to species record (black line)
#notice the remainder of conflict points are not included
knitr::include_graphics("hub.png")

Calculate the average distance

#species richness per point in each province
#read the file with calculated distance from QGIS
bio.con.dist <- read.csv("bio.con.dist.csv", header = TRUE, sep = ",")
#get average distance by province for each taxon
mean.dist <- bio.con.dist %>%
                group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
                summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
head(mean.dist)
write.csv(mean.dist, "mean.distance.csv")

#species richness lumped per province
#read the file with calculated distance in QGIS
prov.bio.con.dist <- read.csv("prov.bio.con.dist.csv", header = TRUE, sep = ",")
#get average distance by province for each taxon
prov.mean.dist <- prov.bio.con.dist %>%
                group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
                summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
write.csv(prov.mean.dist, "prov.mean.distance.csv")

#get average distance by province considering long/lat data (i.e., lumped average)
prov.lmp.dist <- prov.bio.con.dist %>%
                group_by(county, class, NumbSp) %>%
                summarize(lmpMeanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class'. You can override using
## the `.groups` argument.
write.csv(prov.lmp.dist, "prov.lmp.distance.csv")

Plot species record vs average distance

key.lab <- unique(mean.dist$class)
plot.color <- c("#7b3dcc", "#cc3d3d", "#000000", "#d970cb", "#8c994d", "#7f404a","#67E3FC")
point.shape <-  c(0,1,2,3,4,5,6,7,8,9,10)

#plot data with species richness per point
p1 <- ggplot(mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank()) +
            ylab("number of species/record") + 
            xlab("average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#plot data with lumped species richness
p2 <- ggplot(prov.mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank()) +
            ylab("number of species/record") + 
            xlab("average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)

ggarrange(p1,p2, nrow = 1, ncol = 2, common.legend = TRUE, align = "hv",  widths = 20, heights = 5)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

tiff("sp_dist.tif", res=300, width = 7, height = 7, unit="in") 
ggplot(mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
            theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"), 
              legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15), 
              legend.key=element_blank(), legend.position = c(0.83, 0.85), 
              legend.background=element_blank()) +
            ylab("Number of species record") + 
            xlab("Average distance (m)") +
            geom_point(size = 3, stroke = 0.6) + 
            geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
            scale_color_manual(name = "", labels = key.lab, values = plot.color) +
            scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
dev.off()
## quartz_off_screen 
##                 2

Get number of conflict sites per province

#we are not sure how the frequency of conflicts were scored in the paper as it was not mentioned
#so two ways can be considered here
#first, we can get frequency based on the number of unique conflict points within each province
#i.e., per point (unique latitude/longitude)
con.sub.2 <- con.sub
con.sub.2 <- con.sub.2 %>% filter(PROVINCE!="") #filter empty rows

con.freq <- con.sub.2 %>%
              group_by(PROVINCE) %>%
              mutate(ConflicFreq = n_distinct(latitude, longitude)) #use only the unique latitude
con.freq <- con.freq %>% distinct(PROVINCE, .keep_all = TRUE)
write.csv(con.freq, "conflict.frequency.unique.csv")

#second, count each row as a unique report of conflict regardless of coordinates
con.freq2 <- con.sub.2 %>%
              group_by(PROVINCE) %>%
              count()
write.csv(con.freq2, "conflict.frequency.rows.csv")

ggplot(data=con.freq2, aes(x=PROVINCE, y=n)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

#combine biodiversity and conflict data (lumped)
prov.comb <- prov.lmp.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
    if_else(county == "Agusan del Sur", 85,
    if_else(county == "Basilan", 329,
    if_else(county == "Bukidnon", 82,
    if_else(county == "Cagayan de Oro", 0,
    if_else(county == "Camiguin Island", 0,
    if_else(county == "Cotabato", 221,
    if_else(county == "Davao", 0,
    if_else(county == "Davao Oriental", 52,
    if_else(county == "Davao de Oro", 113,
    if_else(county == "Davao del Norte", 56, 
    if_else(county == "Davao del sur", 181,
    if_else(county == "Dinagat Island", 0,
    if_else(county == "General Santos", 0, 
    if_else(county == "Lanao del Norte", 66,
    if_else(county == "Lanao del Sur", 181,
    if_else(county == "Maguindanao", 352, 
    if_else(county == "Misamis Occidental", 15,
    if_else(county == "Misamis Oriental", 38,
    if_else(county == "North Cotabato", 0,
    if_else(county == "Sarangani", 25,
    if_else(county == "South Cotabato", 55,
    if_else(county == "Sultan Kudarat", 62,
    if_else(county == "Sulu", 393,
    if_else(county == "Surigao del Norte", 29,
    if_else(county == "Surigao del Sur", 72,
    if_else(county == "Tawi-Tawi", 17,
    if_else(county == "Zamboanga City", 0,
    if_else(county == "Zamboanga Sibugay", 18,
    if_else(county == "Zamboanga del Norte", 34,
    if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(prov.comb, "prov.lmp.comb.csv")

#combine biodiversity and conflict data (per point)
perpoint.comb <- mean.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
    if_else(county == "Agusan del Sur", 85,
    if_else(county == "Basilan", 329,
    if_else(county == "Bukidnon", 82,
    if_else(county == "Cagayan de Oro", 0,
    if_else(county == "Camiguin Island", 0,
    if_else(county == "Cotabato", 221,
    if_else(county == "Davao", 0,
    if_else(county == "Davao Oriental", 52,
    if_else(county == "Davao de Oro", 113,
    if_else(county == "Davao del Norte", 56, 
    if_else(county == "Davao del sur", 181,
    if_else(county == "Dinagat Island", 0,
    if_else(county == "General Santos", 0, 
    if_else(county == "Lanao del Norte", 66,
    if_else(county == "Lanao del Sur", 181,
    if_else(county == "Maguindanao", 352, 
    if_else(county == "Misamis Occidental", 15,
    if_else(county == "Misamis Oriental", 38,
    if_else(county == "North Cotabato", 0,
    if_else(county == "Sarangani", 25,
    if_else(county == "South Cotabato", 55,
    if_else(county == "Sultan Kudarat", 62,
    if_else(county == "Sulu", 393,
    if_else(county == "Surigao del Norte", 29,
    if_else(county == "Surigao del Sur", 72,
    if_else(county == "Tawi-Tawi", 17,
    if_else(county == "Zamboanga City", 0,
    if_else(county == "Zamboanga Sibugay", 18,
    if_else(county == "Zamboanga del Norte", 34,
    if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(perpoint.comb, "perpoint.comb.csv")

Perform GLM

#glm using lumped species richness per province
#assign taxa as a factor
prov.comb$class <- factor(prov.comb$class)

#plot data
h1 <- ggplot(prov.comb, aes(x = lmpMeanDistance, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
h2 <- ggplot(prov.comb, aes(x = conflictFreq, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
ggarrange(h1, h2, nrow = 1, ncol = 2, align = "hv")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#perform glm
glm.res.1 <- glm(NumbSp ~ conflictFreq + lmpMeanDistance + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.2 <- glm(NumbSp ~ conflictFreq + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.3 <- glm(NumbSp ~ lmpMeanDistance + class, 
                 family="poisson"(link="log"), data = prov.comb)
glm.res.4 <- glm(NumbSp ~ conflictFreq + lmpMeanDistance, 
                 family="poisson"(link="log"), data = prov.comb)

#compare glm results
AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
stepAIC(glm.res.1, direction='both') #use stepAIC function
## Start:  AIC=3641.28
## NumbSp ~ conflictFreq + lmpMeanDistance + class
## 
##                   Df Deviance    AIC
## <none>                 3049.5 3641.3
## - lmpMeanDistance  1   3053.2 3642.9
## - conflictFreq     1   3071.8 3661.5
## - class            6   5655.0 6234.8
## 
## Call:  glm(formula = NumbSp ~ conflictFreq + lmpMeanDistance + class, 
##     family = poisson(link = "log"), data = prov.comb)
## 
## Coefficients:
##     (Intercept)     conflictFreq  lmpMeanDistance    classAmphibia  
##       3.074e+00       -8.597e-04       -3.262e-06        3.833e-01  
##  classArachnida        classAves     classInsecta    classMammalia  
##       4.316e-01        1.322e+00        1.764e+00       -2.183e-01  
##   classReptilia  
##       7.977e-01  
## 
## Degrees of Freedom: 111 Total (i.e. Null);  103 Residual
## Null Deviance:       5683 
## Residual Deviance: 3050  AIC: 3641
#get summary for best model
summary(glm.res.1)
## 
## Call:
## glm(formula = NumbSp ~ conflictFreq + lmpMeanDistance + class, 
##     family = poisson(link = "log"), data = prov.comb)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.074e+00  6.333e-02  48.537  < 2e-16 ***
## conflictFreq    -8.597e-04  1.863e-04  -4.614 3.94e-06 ***
## lmpMeanDistance -3.262e-06  1.724e-06  -1.892  0.05844 .  
## classAmphibia    3.833e-01  7.914e-02   4.844 1.27e-06 ***
## classArachnida   4.316e-01  7.608e-02   5.672 1.41e-08 ***
## classAves        1.322e+00  6.632e-02  19.938  < 2e-16 ***
## classInsecta     1.764e+00  6.359e-02  27.740  < 2e-16 ***
## classMammalia   -2.183e-01  8.308e-02  -2.628  0.00859 ** 
## classReptilia    7.977e-01  7.235e-02  11.026  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 5683.2  on 111  degrees of freedom
## Residual deviance: 3049.5  on 103  degrees of freedom
## AIC: 3641.3
## 
## Number of Fisher Scoring iterations: 5
#glm with taxa as a random variable
pred.scale <- scale(prov.comb[4:5]) #scale predictors
prov.comb.s <- cbind(prov.comb[2:3], pred.scale)

glm.res.5 <- glmer(NumbSp ~ conflictFreq + lmpMeanDistance + (1|class), 
                 family = "poisson"(link ="log"), data = prov.comb.s)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: NumbSp ~ conflictFreq + lmpMeanDistance + (1 | class)
##    Data: prov.comb.s
## 
##      AIC      BIC   logLik deviance df.resid 
##   3677.2   3688.0  -1834.6   3669.2      108 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -10.3473  -3.2751  -0.6575   2.1315  19.4747 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  class  (Intercept) 0.4293   0.6552  
## Number of obs: 112, groups:  class, 7
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.63422    0.24824  14.640  < 2e-16 ***
## conflictFreq    -0.05887    0.01278  -4.607 4.09e-06 ***
## lmpMeanDistance -0.02611    0.01374  -1.901   0.0573 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cnflcF
## conflictFrq 0.003        
## lmpMenDstnc 0.001  0.232
#get R^2
rsquared(glm.res.5, method="trigamma")
#use data per point
#assign taxa as a factor
perpoint.comb$class <- factor(perpoint.comb$class)

#plot data
h1 <- ggplot(perpoint.comb, aes(x = meanDistance, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
h2 <- ggplot(perpoint.comb, aes(x = conflictFreq, y = NumbSp)) + 
        geom_point(pch = 1) + geom_smooth(method = lm)
ggarrange(h1, h2, nrow = 1, ncol = 2, align = "hv")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#perform glm
glm.res.1 <- glm(NumbSp ~ conflictFreq + meanDistance + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.2 <- glm(NumbSp ~ conflictFreq + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.3 <- glm(NumbSp ~ meanDistance + class, 
                 family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.4 <- glm(NumbSp ~ conflictFreq + meanDistance, 
                 family = "poisson"(link ="log"), data = perpoint.comb)

#compare glm results
AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
stepAIC(glm.res.1, direction = 'both') #use stepAIC function
## Start:  AIC=11623.99
## NumbSp ~ conflictFreq + meanDistance + class
## 
##                Df Deviance   AIC
## <none>              9700.3 11624
## - meanDistance  1   9708.8 11630
## - conflictFreq  1   9720.2 11642
## - class         6  12507.7 14419
## 
## Call:  glm(formula = NumbSp ~ conflictFreq + meanDistance + class, family = poisson(link = "log"), 
##     data = perpoint.comb)
## 
## Coefficients:
##    (Intercept)    conflictFreq    meanDistance   classAmphibia  classArachnida  
##      2.480e+00      -8.031e-04       3.921e-06       3.385e-01      -7.199e-02  
##      classAves    classInsecta   classMammalia   classReptilia  
##      1.077e+00       1.052e+00      -6.087e-01       4.347e-01  
## 
## Degrees of Freedom: 457 Total (i.e. Null);  449 Residual
## Null Deviance:       12510 
## Residual Deviance: 9700  AIC: 11620
#get summary for best model
summary(glm.res.1)
## 
## Call:
## glm(formula = NumbSp ~ conflictFreq + meanDistance + class, family = poisson(link = "log"), 
##     data = perpoint.comb)
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     2.480e+00  5.162e-02  48.047  < 2e-16 ***
## conflictFreq   -8.031e-04  1.830e-04  -4.388 1.15e-05 ***
## meanDistance    3.921e-06  1.334e-06   2.940  0.00328 ** 
## classAmphibia   3.385e-01  5.931e-02   5.707 1.15e-08 ***
## classArachnida -7.199e-02  6.144e-02  -1.172  0.24131    
## classAves       1.077e+00  5.243e-02  20.549  < 2e-16 ***
## classInsecta    1.052e+00  5.148e-02  20.435  < 2e-16 ***
## classMammalia  -6.087e-01  6.712e-02  -9.069  < 2e-16 ***
## classReptilia   4.347e-01  5.894e-02   7.377 1.62e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 12508.2  on 457  degrees of freedom
## Residual deviance:  9700.3  on 449  degrees of freedom
## AIC: 11624
## 
## Number of Fisher Scoring iterations: 6
#glm with taxa as a random variable
pred.scale <- scale(perpoint.comb[6:7]) #scale predictors
perpoint.comb.s <- cbind(perpoint.comb[2:3], pred.scale)

glm.res.5 <- glmer(NumbSp ~ conflictFreq + meanDistance + (1|class), 
                 family = "poisson"(link = "log"), data = perpoint.comb.s)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: NumbSp ~ conflictFreq + meanDistance + (1 | class)
##    Data: perpoint.comb.s
## 
##      AIC      BIC   logLik deviance df.resid 
##  11661.4  11677.9  -5826.7  11653.4      454 
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -5.855 -2.960 -1.304  1.059 34.864 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  class  (Intercept) 0.3195   0.5652  
## Number of obs: 458, groups:  class, 7
## 
## Fixed effects:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.77203    0.21404  12.951  < 2e-16 ***
## conflictFreq -0.04652    0.01062  -4.379 1.19e-05 ***
## meanDistance  0.03037    0.01034   2.938  0.00331 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cnflcF
## conflictFrq  0.003       
## meanDistanc -0.002  0.218
#get R^2
rsquared(glm.res.5, method = "trigamma")

Calculate the proportion of species occurrence records and conflict sites positioned in different land cover types

#here we calculate the number of sites found in different land cover types 
#this demonstrates the artifact of sampling, which was not considered in the paper
#land cover data of 1988 was from Swedish Space Corporation (SSC 1988)
#retrieved from https://data.humdata.org/dataset/29a3760f-3170-4555-b5d7-1fbd6cfb5a69?force_layout=desktop
#land cover of 2020 was from the Philippine National Mapping and Resource Information Authority
#data was retrieved from geoportal PH (https://www.geoportal.gov.ph/)
#we used land cover as a proxy for tree density and forest canopy data 
#the national land cover data is ground-thruthed
#this is only for simplistic illustration of sampling artifact
#the original land cover format is shapefile
#we converted it to raster file for fast data processing
#data conversion was done in QGIS using the plugin "rasterize" 
#raster pixel size is 0.0008 or approximately 100 meters
#all raster files have the uniform coordinate reference system, resolution and extent

#here is the 2020 land cover of Mindanao
knitr::include_graphics("lc_2020.png")

#read data
files <- list.files("land cover/", pattern = "tif", full.names=TRUE)
raster.files <- lapply(files, raster)

raster.files
## [[1]]
## class      : RasterLayer 
## dimensions : 7356, 9030, 66424680  (nrow, ncol, ncell)
## resolution : 8e-04, 8e-04  (x, y)
## extent     : 119.381, 126.605, 4.586858, 10.47166  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : 1988_lc_mindanao_rast100m.tif 
## names      : X1988_lc_mindanao_rast100m 
## 
## 
## [[2]]
## class      : RasterLayer 
## dimensions : 7356, 9030, 66424680  (nrow, ncol, ncell)
## resolution : 8e-04, 8e-04  (x, y)
## extent     : 119.381, 126.605, 4.586858, 10.47166  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs 
## source     : 2020_lc_mindanao_rast100m.tif 
## names      : X2020_lc_mindanao_rast100m
#resample raster files to have a uniform dimension/resolution
standard <- raster.files[[1]]
raster.resamp <- list(standard)
for (i in 2:length(raster.files)) {
  raster.resamp[[i]] <- resample(raster.files[[i]], standard,
                                 method='bilinear')}
#stack raster files
lc <- stack(raster.files)

#plot data
plot(lc$X1988_lc_mindanao_rast100m)
points(con.sub$longitude, con.sub$latitude)

con.coor <- cbind(con.sub$longitude, con.sub$latitude)

#extract data from land cover using conflict data coordinates
con.lc.data <- extract(lc, con.coor)
colnames(con.lc.data)[1] <- "LC_1988"
colnames(con.lc.data)[2] <- "LC_2020"
con.lc.data <- as.data.frame(na.omit(con.lc.data))
head(con.lc.data)
#land cover types code
lc.88.d <- read.csv("1988_lc_mindanao_code.csv")
lc.88.d
lc.20.d <- read.csv("2020_lc_mindanao_code.csv")
lc.20.d
#add land cover descriptions
con.lc.data.m1 <- con.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 4, "Lake", 
    if_else(LC_1988 == 5,"Cultivated Area mixed with brushland/grassland",  
    if_else(LC_1988 == 7, "Built-up Area",
    if_else(LC_1988 == 13, "Quarry",
    if_else(LC_1988 == 15, "Unclassified",
    if_else(LC_1988 == 37, "Coconut plantations",
    if_else(LC_1988 == 42, "Open Canopy",
    if_else(LC_1988 == 154, "Crop land mixed with coconut plantation",
    if_else(LC_1988 == 487, "Arable land, crops mainly cereals and sugar",
    if_else(LC_1988 == 504, "Coral Reef",
    if_else(LC_1988 == 557, "Riverbeds", 
    if_else(LC_1988 == 667, "Open/Siltation pattern in lake",
    if_else(LC_1988 == 683, "Mangrove vegetation",
    if_else(LC_1988 == 706, "Fishponds derived from mangrove", 
    if_else(LC_1988 == 728, "Marshy area and swamp",
    if_else(LC_1988 == 746, "Closed Canopy",
    if_else(LC_1988 == 756, "Crop land mixed with other plantation",
    if_else(LC_1988 == 776, "Other plantations",
    if_else(LC_1988 == 840, "Grassland, grass covering > 70 percent",
    if_else(LC_1988 == 867, "Open Canopy", "No data")))))))))))))))))))))

con.lc.data.m2 <- con.lc.data %>% mutate(con.lc.data.m1, class_2020 = if_else(LC_2020 == 1, "Grassland", 
    if_else(LC_2020 == 2,"Annual Crop",  
    if_else(LC_2020 == 3, "Marshland/Swamp",
    if_else(LC_2020 == 4, "Perennial Crop",
    if_else(LC_2020 == 5, "Built-up",
    if_else(LC_2020 == 6, "Fishpond",
    if_else(LC_2020 == 7, "Inland Water",
    if_else(LC_2020 == 8, "Mangrove Forest",
    if_else(LC_2020 == 9, "Brush/Shrubs",
    if_else(LC_2020 == 10, "Closed Forest",
    if_else(LC_2020 == 11, "Open Forest", 
    if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))

#count the number of conflict sites in each land cover type
#1988 data
con.lc.count.88 <- con.lc.data.m2 %>%
                      group_by(class_1988) %>%
                      summarise(n())
colnames(con.lc.count.88)[2] <- "count"

ggplot(data=con.lc.count.88, aes(x=class_1988, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

con.lc.count.88 <- as.data.frame(con.lc.count.88)
write.csv(con.lc.count.88, "con.lc.count.88.csv")

#2020 data
con.lc.count.20 <- con.lc.data.m2 %>%
                      group_by(class_2020) %>%
                      summarise(n())
colnames(con.lc.count.20)[2] <- "count"

ggplot(data=con.lc.count.20, aes(x=class_2020, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

con.lc.count.20 <- as.data.frame(con.lc.count.20)
write.csv(con.lc.count.20, "con.lc.count.20.csv")

#extract data from land cover using biodiversity data coordinates
bio.coor <- cbind(bio.sub$decimalLongitude, bio.sub$decimalLatitude)
bio.lc.data <- extract(lc, bio.coor)
colnames(bio.lc.data )[1] <- "LC_1988"
colnames(bio.lc.data )[2] <- "LC_2020"
bio.lc.data <- as.data.frame(na.omit(bio.lc.data ))
head(bio.lc.data)
#add land cover descriptions
bio.lc.data.m1 <- bio.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 4, "Lake", 
    if_else(LC_1988 == 5,"Cultivated Area mixed with brushland/grassland",  
    if_else(LC_1988 == 7, "Built-up Area",
    if_else(LC_1988 == 13, "Quarry",
    if_else(LC_1988 == 15, "Unclassified",
    if_else(LC_1988 == 37, "Coconut plantations",
    if_else(LC_1988 == 42, "Open Canopy",
    if_else(LC_1988 == 154, "Crop land mixed with coconut plantation",
    if_else(LC_1988 == 487, "Arable land, crops mainly cereals and sugar",
    if_else(LC_1988 == 504, "Coral Reef",
    if_else(LC_1988 == 557, "Riverbeds", 
    if_else(LC_1988 == 667, "Open/Siltation pattern in lake",
    if_else(LC_1988 == 683, "Mangrove vegetation",
    if_else(LC_1988 == 706, "Fishponds derived from mangrove", 
    if_else(LC_1988 == 728, "Marshy area and swamp",
    if_else(LC_1988 == 746, "Closed Canopy",
    if_else(LC_1988 == 756, "Crop land mixed with other plantation",
    if_else(LC_1988 == 776, "Other plantations",
    if_else(LC_1988 == 840, "Grassland, grass covering > 70 percent",
    if_else(LC_1988 == 867, "Open Canopy", "No data")))))))))))))))))))))

bio.lc.data.m2<- bio.lc.data %>% mutate(bio.lc.data.m1, class_2020 = if_else(LC_2020 == 1, "Grassland", 
    if_else(LC_2020 == 2,"Annual Crop",  
    if_else(LC_2020 == 3, "Marshland/Swamp",
    if_else(LC_2020 == 4, "Perennial Crop",
    if_else(LC_2020 == 5, "Built-up",
    if_else(LC_2020 == 6, "Fishpond",
    if_else(LC_2020 == 7, "Inland Water",
    if_else(LC_2020 == 8, "Mangrove Forest",
    if_else(LC_2020 == 9, "Brush/Shrubs",
    if_else(LC_2020 == 10, "Closed Forest",
    if_else(LC_2020 == 11, "Open Forest", 
    if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))

#count the number of conflict sites in each land cover type
#1988 data
bio.lc.count.88 <- bio.lc.data.m2 %>%
                      group_by(class_1988) %>%
                      summarise(n())
colnames(bio.lc.count.88)[2] <- "count"

ggplot(data=bio.lc.count.88, aes(x=class_1988, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

bio.lc.count.88 <- as.data.frame(bio.lc.count.88)
write.csv(bio.lc.count.88, "bio.lc.count.88.csv")

#2020 data
bio.lc.count.20 <- bio.lc.data.m2 %>%
                      group_by(class_2020) %>%
                      summarise(n())
colnames(bio.lc.count.20)[2] <- "count"

bio.lc.count.20 <- as.data.frame(bio.lc.count.20)

ggplot(data=bio.lc.count.20, aes(x=class_2020, y=count)) +
  geom_bar(stat="identity", fill="#5794db") +
  guides(x =  guide_axis(angle = 75)) 

bio.lc.count.20 <- as.data.frame(bio.lc.count.20)
write.csv(bio.lc.count.20, "bio.lc.count.20.csv")